Install the following packages if needed:
pip install Cython pycocotools scikit-image
import numpy as np
from pycocotools.coco import COCO
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon, Rectangle
import skimage.io as io
import skimage.draw as draw
%matplotlib inline
%load_ext autoreload
%autoreload 2
import sys
sys.path.insert(0, "../../")
from lshist.histogram import operations, Histogram1D, HElement
from lshist.executor import Parser, Evaluator
from lshist.utils import E
Download the dataset from the COCO website:
ANNOT_FILE_PATH = "datasets/annotations/instances_val2017.json"
IMAGE_PATH = "datasets/val2017"
IMAGE_ID = 404484
coco = COCO(ANNOT_FILE_PATH)
img_meta = coco.loadImgs(ids=[IMAGE_ID])[0]
img_meta
I = io.imread("{}/{}".format(IMAGE_PATH, img_meta["file_name"]))
plt.imshow(I)
plt.show()
cats = coco.loadCats(coco.getCatIds())
len_cats = len(cats)
print("Single category:\n{}\n".format(cats[0]))
print("All categories:\n{}\n".format(" ".join([cat["name"] for cat in cats])))
print("Total number of categories: {}".format(len_cats))
img_anns_id = coco.getAnnIds(imgIds=IMAGE_ID, iscrowd=None)
img_anns = coco.loadAnns(img_anns_id)
print(img_anns[0])
seg_polys = list()
poly_colors = list()
for seg in img_anns:
seg_ = seg["segmentation"][0]
poly_colors.append((np.random.random((1, 3))*0.5+0.5).tolist()[0])
seg_polys.append(Polygon(np.array(seg_).reshape((int(len(seg_)/2), 2)), fill=False))
fig, ax = plt.subplots(1) #, figsize=(15,15))
ax.imshow(I)
p_objs = PatchCollection(seg_polys, facecolor=poly_colors, edgecolor=poly_colors, alpha=0.6, linewidths=2)
ax.add_collection(p_objs)
plt.show()
def generate_positional_grid_1d(num_x, num_y):
elements = list()
for i in range(num_y):
for j in range(num_x):
element = dict()
element["id"] = "e{}".format(i*num_x + j + 1)
element["pos"] = (j*1/num_x, i*1/num_y, 1/num_x, 1/num_y)
elements.append(element)
return elements
def get_positional_grid_1d(width, height, elements):
elements_abs = list()
for el in elements:
x_start = el["pos"][0] * width
y_start = el["pos"][1] * height
x_end = x_start + el["pos"][2] * width
y_end = y_start + el["pos"][3] * height
elements_abs.append({"id": el["id"], "pos": (x_start, y_start, x_end, y_end)})
return elements_abs
GRID_X_SPLITS = 5
GRID_Y_SPLITS = 5
grid = generate_positional_grid_1d(GRID_X_SPLITS, GRID_Y_SPLITS)
grid[:2]
position_elements = get_positional_grid_1d(img_meta["width"], img_meta["height"], grid)
position_elements[:5]
position_converter = {el["id"]: el["pos"] for el in position_elements}
Up = {el["id"] for el in position_elements}
Show the positional element along with the initial image:
def show_positional_grid(I, elements, position_converter):
pos_el_rects = list()
pos_el_texts = list()
fig, ax = plt.subplots(1) #, figsize=(15,15))
ax.imshow(I)
for el in elements:
pos = position_converter[el]
left, width = pos[0], pos[2]-pos[0]
bottom, height = pos[1], pos[3]-pos[1]
right = left + width
top = bottom + height
ax.add_patch(Rectangle(xy=(left, bottom), width=width, height=height, fill=False,
label=el, edgecolor="red", linewidth=2))
ax.text(0.5*(left+right), 0.5*(bottom+top), el,
horizontalalignment="center", verticalalignment="center", fontsize=15, color="red")
plt.show()
show_positional_grid(I, Up, position_converter)
def create_position_mask(width, height, position_elements):
pos_mask = np.zeros((height, width), dtype=np.object) #dtype=np.int)
# pos_mask = np.chararray((img_meta["height"], img_meta["width"]))
for pos in position_elements:
start = [int(pos["pos"][1]), int(pos["pos"][0])]
end = [int(pos["pos"][3]), int(pos["pos"][2])]
r, c = draw.rectangle(start, end=end, shape=pos_mask.shape)
r.dtype = c.dtype = np.int
pos_mask[r, c] = pos["id"] # int(pos["id"].strip("e"))
return pos_mask
pos_mask = create_position_mask(img_meta["width"], img_meta["height"], position_elements)
pos_mask
convpos2int = np.vectorize(lambda x: int(x.strip("e")))
plt.imshow(convpos2int(pos_mask))
plt.show()
parser = Parser()
Ep_center = E("e7+e8+e9+e12+e13+e14+e17+e18+e19")
Ep_center_set = parser.parse_set(Ep_center.value)
Ep_center_set
# Definition of high-level positional elements
Ep_top = E("e1+e2+e3+e4+e5+e6+e7+e8+e9+e10")
Ep_bottom = E("e16+e17+e18+e19+e20+e21+e22+e23+e24+e25")
Ep_left = E("e1+e2+e6+e7+e11+e12+e16+e17+e21+e22")
Ep_right = E("e4+e5+e9+e10+e14+e15+e19+e20+e24+e25")
Ep_center = E("e7+e8+e9+e12+e13+e14+e17+e18+e19")
Eps = [("top", Ep_top), ("bottom", Ep_bottom), ("left", Ep_left), ("right", Ep_right), ("center", Ep_center)]
# Sets of high-level positional elements (they will be used for the Evaluator below)
Eps_set = { name: parser.parse_set(Ep.value) for name, Ep in Eps}
Eps_set["center"]
Show a grid of the high-level element along with the initial image:
show_positional_grid(I, Eps_set["center"], position_converter)
Show the high-level element based in the position mask:
def show_positional_elements(I, pos_mask, elements):
mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
for x in range(I.shape[1]):
for y in range(I.shape[0]):
if pos_mask[y,x] in elements:
mask[y,x] = I[y,x]
fig, ax = plt.subplots(1) #, figsize=(15,15))
ax.imshow(I)
ax.imshow(mask, alpha=0.5)
plt.show()
show_positional_elements(I, pos_mask, Eps_set["center"])
cats[:2]
Uo = {str(cat["id"]) for cat in cats}
def create_object_mask(width, height, img_anns):
obj_mask = np.full((height, width), fill_value="null", dtype=np.object) # fill_value=-1, dtype=np.int)
for i in range(len(img_anns)):
if img_anns[i]["iscrowd"] == 0:
seg_ = img_anns[i]["segmentation"][0]
poly_ = np.array(seg_).reshape((int(len(seg_)/2), 2))
r, c = draw.polygon(poly_[:,1], poly_[:,0])
obj_mask[r, c] = str(img_anns[i]["category_id"])
return obj_mask
obj_mask = create_object_mask(img_meta["width"], img_meta["height"], img_anns)
obj_mask
convobj2int = np.vectorize(lambda x: int(x.strip("null") if x.strip("null") else 0))
plt.imshow(convobj2int(obj_mask))
plt.show()
catid_by_name = {cat["name"]: cat["id"] for cat in cats}
catid_by_name["person"]
catname_by_id = {cat["id"]: cat["name"] for cat in cats}
catname_by_id[1]
Eo_person = E(str(catid_by_name["person"]))
Eo_person_set = parser.parse_set(Eo_person.value)
Eo_person_set
Eos_set = {cat["name"]: parser.parse_set(E(str(cat["id"])).value) for cat in cats}
Eos_set["person"]
Eo_pet = E("{}+{}".format(catid_by_name["dog"], catid_by_name["cat"]))
Eo_pet_set = parser.parse_set(Eo_pet.value)
Eo_pet_set
Eos_set.update({"pet": Eo_pet_set})
Eos_set["pet"]
Show a high-level element along with the initial image:
def show_object_segment(I, elements, image_objects):
seg_polys = list()
poly_colors = list()
for seg in image_objects:
if str(seg["category_id"]) in elements:
seg_ = seg["segmentation"][0]
poly_colors.append((np.random.random((1, 3))*0.5+0.5).tolist()[0])
seg_polys.append(Polygon(np.array(seg_).reshape((int(len(seg_)/2), 2)), fill=False))
fig, ax = plt.subplots(1) #, figsize=(15,15))
ax.imshow(I)
p_objs = PatchCollection(seg_polys, facecolor=poly_colors, edgecolor=poly_colors, alpha=0.6, linewidths=2)
ax.add_collection(p_objs)
plt.show()
img_anns_id = coco.getAnnIds(imgIds=IMAGE_ID, iscrowd=None)
img_anns = coco.loadAnns(img_anns_id)
show_object_segment(I, Eos_set["pet"], img_anns)
Show the high-level element based in the object mask:
def show_object_elements(I, obj_mask, elements):
mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
colors = {el: np.random.randint(0, 255, 3) for el in elements}
for x in range(I.shape[1]):
for y in range(I.shape[0]):
if obj_mask[y,x] in elements:
mask[y,x] = colors[obj_mask[y,x]] #(0,255,156) #colors[obj_mask[y,x]]
fig, ax = plt.subplots(1) #, figsize=(15,15))
ax.imshow(I)
ax.imshow(mask, alpha=0.5)
plt.show()
show_object_elements(I, obj_mask, Eos_set["pet"])
def create_histogram(width, height, pos_mask, obj_mask):
hist = Histogram1D(data=None)
for x in range(width):
for y in range(height):
if obj_mask[y, x] != "null": # if obj_mask[y, x] > 0:
el_id = (pos_mask[y, x], obj_mask[y, x])
if el_id not in hist:
hist[el_id] = HElement(el_id, 0)
hist[el_id].value += 1
hist.normalize(width * height)
return hist
hist = create_histogram(img_meta["width"], img_meta["height"], pos_mask, obj_mask)
hist.to_dict()
high_level_elements = {
0: Eps_set, # positions
1: Eos_set # objects
}
evaluator = Evaluator(operations, hist, high_level_elements=high_level_elements)
POS1 = "center"
OBJ1 = "person"
POS2 = "left"
OBJ2 = "dog"
E1 = E(POS1, OBJ1)
E2 = E(POS2, OBJ2)
E1_expr = parser.parse_string(E1.value)
HE1 = evaluator.eval(E1_expr)
print("Expression for E1:\n{}".format(E1.value))
print("\nThe parsed expressino for E1 in the postfix notation:\n{}".format(E1_expr))
print("\nHistogram of E1 given the image:\n{}".format(HE1.to_dict()))
print("\nValue of presence for E1:\n{}".format(HE1.sum()))
def show_elements(I, pos_mask, obj_mask, pos_elements, obj_elements, title=None):
mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
colors = {el: np.random.randint(0, 255, 3) for el in obj_elements}
for x in range(I.shape[1]):
for y in range(I.shape[0]):
if pos_mask[y,x] in pos_elements:
mask[y,x] = I[y,x]
if obj_mask[y,x] in obj_elements:
mask[y,x] = colors[obj_mask[y,x]]
fig, ax = plt.subplots(1)
if title:
fig.suptitle(title)
ax.imshow(I)
ax.imshow(mask, alpha=0.5)
plt.show()
def show_elements_by_HE(I, pos_mask, obj_mask, HE, title=None):
mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
elements = HE.to_dict().keys()
pos_elements = {el[0] for el in elements}
obj_elements = {el[1] for el in elements}
colors = {el: np.random.randint(0, 255, 3) for el in obj_elements}
for x in range(I.shape[1]):
for y in range(I.shape[0]):
if pos_mask[y,x] in pos_elements:
mask[y,x] = I[y,x]
if (pos_mask[y,x], obj_mask[y,x]) in elements:
mask[y,x] = colors[obj_mask[y,x]]
fig, ax = plt.subplots(1)
if title:
fig.suptitle(title)
ax.imshow(I)
ax.imshow(mask, alpha=0.5)
plt.show()
show_elements(I, pos_mask, obj_mask, Eps_set[POS1], Eos_set[OBJ1], title="E1")
show_elements_by_HE(I, pos_mask, obj_mask, HE1, title="E1")
E2_expr = parser.parse_string(E2.value)
HE2 = evaluator.eval(E2_expr)
print("Expression for E2:\n{}".format(E2.value))
print("\nThe parsed expressino for E2 in the postfix notation:\n{}".format(E2_expr))
print("\nHistogram of E2 given the image:\n{}".format(HE2.to_dict()))
print("\nValue of presence for E2:\n{}".format(HE2.sum()))
show_elements(I, pos_mask, obj_mask, Eps_set[POS2], Eos_set[OBJ2], title="E2")
show_elements_by_HE(I, pos_mask, obj_mask, HE2, title="E2")
E_union = E1 + E2
E_union_expr = parser.parse_string(E_union.value)
HE_union = evaluator.eval(E_union_expr)
print("Expression for E_union:\n{}".format(E_union))
print("\nThe parsed expression for E_union in the postfix notation:\n{}".format(E_union_expr))
print("\nHistogram of E_union given the image:\n{}".format(HE_union.to_dict()))
print("\nValue of presence for E_union:\n{}".format(HE_union.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_union, title="E_union")
E_intersect = E1 * E2 # or E1.Intersection(E2)
E_intersect_expr = parser.parse_string(E_intersect.value)
HE_intersect = evaluator.eval(E_intersect_expr)
print("Expression for E_intersect:\n{}".format(E_intersect))
print("\nThe parsed expression for E_intersect in the postfix notation:\n{}".format(E_intersect_expr))
print("\nHistogram of E_intersect given the image:\n{}".format(HE_intersect.to_dict()))
print("\nValue of presence for E_intersect:\n{}".format(HE_intersect.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_intersect, title="E_intersect")
E_sub = E1 - E2 # or E1.Sub(E2)
E_sub_expr = parser.parse_string(E_sub.value)
HE_sub = evaluator.eval(E_sub_expr)
print("Expression for E_sub:\n{}".format(E_sub))
print("\nThe parsed expression for E_sub in the postfix notation:\n{}".format(E_sub_expr))
print("\nHistogram of E_sub given the image:\n{}".format(HE_sub.to_dict()))
print("\nValue of presence for E_sub:\n{}".format(HE_sub.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_sub, title="E_sub")
E_and = E1 & E2 # or E1.And(E2)
E_and_expr = parser.parse_string(E_and.value)
HE_and = evaluator.eval(E_and_expr)
print("Expression for E_and:\n{}".format(E_and))
print("\nThe parsed expression for E_and in the postfix notation:\n{}".format(E_and_expr))
print("\nHistogram of E_and given the image:\n{}".format(HE_and.to_dict()))
print("\nValue of presence for E_and:\n{}".format(HE_and.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_and, title="E_and")
E_or = E1 | E2 # or E1.Or(E2)
E_or_expr = parser.parse_string(E_or.value)
HE_or = evaluator.eval(E_or_expr)
print("Expression for E_or:\n{}".format(E_or))
print("\nThe parsed expression for E_or in the postfix notation:\n{}".format(E_or_expr))
print("\nHistogram of E_or given the image:\n{}".format(HE_or.to_dict()))
print("\nValue of presence for E_or:\n{}".format(HE_or.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_or, title="E_or")
E_xor = E1 ^ E2 # or E1.Xor(E2)
E_xor_expr = parser.parse_string(E_xor.value)
HE_xor = evaluator.eval(E_xor_expr)
print("Expression for E_xor:\n{}".format(E_xor))
print("\nThe parsed expression for E_xor in the postfix notation:\n{}".format(E_xor_expr))
print("\nHistogram of E_xor given the image:\n{}".format(HE_xor.to_dict()))
print("\nValue of presence for E_xor:\n{}".format(HE_xor.sum()))
show_elements_by_HE(I, pos_mask, obj_mask, HE_xor, title="E_xor")
# TODO
Serialize the historgram objects:
# import time
# from IPython.display import clear_output
# LIMIT = 5000
# start_tick = time.time()
# hists = list()
# for indx, (img_id, img_meta) in enumerate(coco.imgs.items()):
# if indx == LIMIT:
# break
# img_anns = coco.imgToAnns[img_id]
# position_elements = get_positional_grid_1d(img_meta["width"], img_meta["height"], grid)
# pos_mask = create_position_mask(img_meta["width"], img_meta["height"], position_elements)
# obj_mask = create_object_mask(img_meta["width"], img_meta["height"], img_anns)
# hist = create_histogram(img_meta["width"], img_meta["height"], pos_mask, obj_mask)
# hists.append((img_id, hist))
# clear_output(wait=True)
# print("Current image index: {}".format(indx))
# # if indx % 100 == 0:
# # print("Current image index: {}".format(indx))
# delta_tick = time.time() - start_tick
# print("Total time: {}s".format(delta_tick))
# print("Time per image: {}s".format(delta_tick / LIMIT))
# with open("imagehist.pickle", "wb") as f:
# import pickle
# pickle.dump(hists, f, pickle.HIGHEST_PROTOCOL)
Deserialize the histogram of images:
with open("imagehist.pickle", "rb") as f:
import pickle
hists = pickle.load(f)
Define your query:
query = E("left", "dog") & E("center", "person")
Retrieve images using the query:
def retrieve(query, hists, topN=10, lastN=None, threshold=0.001):
expr = parser.parse_string(query.value)
HEs = [(img_id, evaluator.eval(expr, hist)) for img_id, hist in hists]
img_rank = sorted([(img_id, HE.sum()) for img_id, HE in HEs if HE.sum() > threshold], key=lambda x: -x[1])
if isinstance(lastN, int):
return img_rank[:topN], img_rank[-lastN:]
return img_rank[:topN]
IMAGE_LIMIT = 11
IMAGE_CLMNS = 5
def show_retrieved_images(img_rank, img_paths, limit=None):
img_limit = len(img_rank) if IMAGE_LIMIT > len(img_rank) else IMAGE_LIMIT
if limit:
img_limit = limit
row_num = -(-img_limit // IMAGE_CLMNS)
fig, axs = plt.subplots(row_num, IMAGE_CLMNS, figsize=(15, 4*row_num), squeeze=False)
for i in range(row_num):
for j in range(IMAGE_CLMNS):
indx = i*IMAGE_CLMNS + j
if indx >= img_limit:
fig.delaxes(axs[i,j])
else:
I = io.imread(img_paths[indx])
axs[i,j].imshow(I)
axs[i,j].set_title("rank={}\nid={}\nscore={:0.4f}".format(indx+1,
img_rank[indx][0],
img_rank[indx][1]))
plt.tight_layout()
plt.show()
TOP_N = 20
img_rank = retrieve(query, hists, topN=TOP_N)
img_rank
Show the retrieved images:
img_paths = ["{}/{}".format(IMAGE_PATH, coco.imgs[img_meta_[0]]["file_name"]) for img_meta_ in img_rank]
img_paths[:1]
show_retrieved_images(img_rank, img_paths, limit=TOP_N)